In [1]:
#################
### SIR Model ###
#################
import scipy.integrate as spi
import numpy as np
import pylab as pl

beta =  0.1934 # 논문에 나온 beta 값 참조 (2020.02.10 중국논문발표)
# 우리나라도 중국과 마찬가지로 2차 지역감염이 시작되고 전염력이 2/18 당시 매우 높았던것으로 보여 논문의 beta 도입.

gamma =  1/14 # I ->R 회복율 = 평균 회복기간의 역수
t_inc = 1.0 
t_end = 150.0

# 2/18 31번확진자 나온날 기준으로 초기 SIR 모델 만듦.
S0 = int(input('Input the accumulated number of tests: '))
I0 = int(input('Input the accumulated number of positive results: '))
R0 = int(input('Input the accumulated number of releases: '))
# S0  = 9772 ; I0  = 31; R0 = 12
N = S0 + I0 + R0 
S0  = S0 /N  # susceptible hosts
I0  = I0 /N    # infectious hosts
R0 = R0 /N      # recovered hosts

Input = (S0, I0, 0.0)

Input

def simple_SIR(INT, t):
  '''The main set of equation'''
  Y=np.zeros((3))
  X = INT      #  S0,   I0 
  Y[0] = -beta * X[0] * X[1]
  Y[1] = beta*X[0]*X[1]  - gamma * X[1]
  Y[2] = gamma * X[1]
  return Y # for spicy.odeint

t_start =0.0 ; 
t_range = np.arange(t_start, t_end + t_inc, t_inc)
SIR= spi.odeint(simple_SIR, Input, t_range)

pl.figure(figsize=(15,8))
pl.plot(SIR[:, 0], '-g', label='Susceptibles')
pl.plot(SIR[:, 2], '-k', label='Recovereds')
pl.plot(SIR[:, 1], '-r', label='Infectious')
pl.legend(loc=0)
pl.title('Prediction of Simple nCOV-19 SIR model')
pl.xlabel('Time(day)')
pl.ylabel('individuals')
pl.show()
Input the accumulated number of tests: 9772
Input the accumulated number of positive results: 31
Input the accumulated number of releases: 12
In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import os, datetime, math
for dirname, _, filenames in os.walk(r'C:\Users\Jaewoong\Desktop\AIFFEL\hackathon'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
#####################
### Read CSV file ###
#####################
path = r'C:\\Users\\Jaewoong\\Desktop\\AIFFEL\\hackathon\\'

case = p_info = pd.read_csv(path+'Case.csv')
p_info = pd.read_csv(path+'PatientInfo.csv')
#p_route = pd.read_csv(path+'PatientRoute.csv')

################### Time ###################
# time = pd.read_csv(path+'Time.csv')
time = pd.read_csv(path+'Time_v1.csv')
t_age = pd.read_csv(path+'TimeAge.csv')
t_gender = pd.read_csv(path+'TimeGender.csv')
t_provin = pd.read_csv(path+'TimeProvince.csv')

region = pd.read_csv(path+'Region.csv')
weather = pd.read_csv(path+'Weather.csv')
search = pd.read_csv(path+'SearchTrend.csv')
floating = pd.read_csv(path+'SeoulFloating.csv')
policy = pd.read_csv(path+'Policy.csv')
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Case.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\confirmed2released_date.png
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\PatientInfo.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\plotly_append_to_html.py
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\plotly_result.html
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Policy.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Region.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\SearchTrend.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\SeoulFloating.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\symptom2confirmed_date.png
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Time.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\TimeAge.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\TimeGender.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\TimeProvince.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Time_v1.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Time_v2.csv
C:\Users\Jaewoong\Desktop\AIFFEL\hackathon\Weather.csv
In [3]:
weather['date_int'] = weather['date'].agg(lambda x: int(''.join(x.split('-'))))
prev_weather_idx = weather[weather['date_int'] < 20200120].index
weather_s20200120 = weather.drop(prev_weather_idx)
weather_s20200120 = weather_s20200120.reset_index(drop=True)

fig3 = px.scatter_3d(weather_s20200120,
                    x='date',
                    y='avg_temp',
                    z='avg_relative_humidity',
                    color='province',
                    opacity=0.7,
                    size_max=20)
fig3.update_traces(marker={"opacity": 0.7, "size":2})
fig3.show()
In [4]:
t_provin['date_int'] = t_provin['date'].agg(lambda x: int(''.join(x.split('-'))))
filtered_t_provin_idx = t_provin[t_provin['date_int'] > weather_s20200120.date_int.max()].index
t_provin_e20200629 = t_provin.drop(filtered_t_provin_idx)
t_provin_e20200629 = t_provin_e20200629.reset_index(drop=True)
t_provin_weather = pd.merge(t_provin_e20200629, weather_s20200120, on=['date', 'province', 'date_int'], how='left').fillna(0)
In [5]:
pd_tmp = p_info[p_info.symptom_onset_date.isnull() == False]
pd_tmp2 = pd_tmp[pd_tmp.released_date.isnull() == False]
pd_tmp3 = pd_tmp2[pd_tmp2['symptom_onset_date'] != ' ']
pd_tmp3 = pd_tmp3.reset_index(drop=True)

format = '%Y-%m-%d'
# dt_datetime2= datetime.datetime.strptime(datetime_str,format)
pd_tmp3['symptom2confirmed_date'] = pd_tmp3.apply(lambda x: datetime.datetime.strptime(x['confirmed_date'],format)-datetime.datetime.strptime(x['symptom_onset_date'], format), axis='columns')
pd_tmp3['symptom2confirmed_date'] = pd_tmp3['symptom2confirmed_date'].dt.days
sns.histplot(data=pd_tmp3, x="symptom2confirmed_date", kde=True)
plt.title("Date from symptom onset to confirmed")
plt.show()


pd_tmp3['confirmed2released_date'] = pd_tmp3.apply(lambda x: datetime.datetime.strptime(x['released_date'],format)-datetime.datetime.strptime(x['confirmed_date'], format), axis='columns')
pd_tmp3['confirmed2released_date'] = pd_tmp3['confirmed2released_date'].dt.days
sns.histplot(data=pd_tmp3, x="confirmed2released_date", kde=True)
plt.title("Date from confirmed to released")
plt.show()
print(pd_tmp3.describe())
         patient_id  symptom2confirmed_date  confirmed2released_date
count  1.680000e+02              168.000000               168.000000
mean   3.362595e+09                5.970238                25.625000
std    1.731598e+09                8.760571                12.210081
min    1.000000e+09               -8.000000                 4.000000
25%    1.700000e+09                1.750000                17.000000
50%    4.100000e+09                4.000000                24.000000
75%    4.100000e+09                6.000000                34.000000
max    6.100000e+09               48.000000                72.000000
In [6]:
def cal_diff_square(x):
    diff_square = (x['real_Rt']-x['cal_Rt'])**2
    return diff_square
for alpha in [4,5,6,7,8,9]:
    pd_time = time.copy()
    diff_confirmed = pd_time.confirmed
    pd_time['today_confirmed'] = pd_time.confirmed.diff().fillna(0)
    pd_time[f'after{alpha}_confirmed'] = pd_time.today_confirmed.shift(-alpha).fillna(0)
    pd_time = pd_time[:-alpha]
    
    def calculate_Rt(x):
        cal_Rt = 0
        try:
            cal_Rt = min(x[f'after{alpha}_confirmed']/x['today_confirmed'],10)
        except ZeroDivisionError as e:
            # print(e, f':  No confirmed people in {x.date}')
            pass
        return cal_Rt
    
    pd_time['cal_Rt'] = pd_time.apply(calculate_Rt, axis='columns')
    pd_time['diff_square'] = pd_time.apply(cal_diff_square, axis='columns')
    rmse = math.sqrt(pd_time['diff_square'].mean())
    print(f'{alpha = }, RMSE =',rmse)
alpha = 4, RMSE = 2.039542928814707
alpha = 5, RMSE = 2.064957850100958
alpha = 6, RMSE = 1.9795704775482634
alpha = 7, RMSE = 1.9588924454009522
alpha = 8, RMSE = 2.084549308294982
alpha = 9, RMSE = 2.149522723124322
In [7]:
alpha=7

pd_time = time.copy()
diff_confirmed = pd_time.confirmed
pd_time['today_confirmed'] = pd_time.confirmed.diff().fillna(0)
pd_time[f'after{alpha}_confirmed'] = pd_time.today_confirmed.shift(-alpha).fillna(0)
pd_time = pd_time[:-alpha]
def calculate_Rt(x):
    cal_Rt = 0
    try:
        cal_Rt = min(x[f'after{alpha}_confirmed']/x['today_confirmed'],10) # Rt clipping 후처리
        # px.line(pd_time, x='date', y='today_confirmed') # 대구집단 감염 outlier를 원인으로 판단
    except ZeroDivisionError as e:
        # print(e, f':  No confirmed people in {x.date}')
        pass
    return cal_Rt
pd_time['cal_Rt'] = pd_time.apply(calculate_Rt, axis='columns')

fig6 = go.Figure()
fig6.add_trace(go.Scatter(x=pd_time.date, y=pd_time.cal_Rt,
                    mode='lines',
                    name='Calculated Rt'))
fig6.add_trace(go.Scatter(x=pd_time.date, y=pd_time.real_Rt,
                    mode='lines',
                    name='Real Rt'))
fig6.update_layout(
    title="Reproduction number Rt",
    xaxis_title="Date",
    yaxis_title="Rt",)
In [8]:
i = 'Seoul'
pd_provin = t_provin_weather[t_provin_weather['province'] == i]
fig5 = px.scatter_matrix(pd_provin,
                        dimensions=["confirmed", "released", "avg_temp", "most_wind_direction", "avg_relative_humidity"],
                        color='province', title=i)
fig5.update_traces(diagonal_visible=False)
fig5.update_traces(marker={"opacity": 0.7, "size":5})
fig5.update_layout(autosize=False, width=1000, height=800)
# fig5.write_html(f'{i} scatter graph')
fig5.show()